#################################################################
# Set up workspace

### Clear global environment
rm(list=ls())

# Create path
pth <- "~/Dropbox/Projects/Dissertation/"

### Load libraries
# Libraries
loadPkg <- function(toLoad){
  for(lib in toLoad){
    if(! lib %in% installed.packages()[,1])
    { install.packages(lib, repos='http://cran.rstudio.com/') }
    suppressMessages( library(lib, character.only=TRUE) )
  }
}

toLoad <- c(
  'MASS', 'magrittr', 'tidyverse', 'plyr', 'stargazer', 'data.table', 'fmsb', 'glmnet', 'ggplot2', 'interplot', 'effects',
  'grid', 'gridBase', 'gridExtra', 'dominanceanalysis', 'DescTools', 'abmisc', 'pscl', 'emIRT', 'parallel', 'wnominate', 'scales',
  'ggeffects', 'randomForest', 'RColorBrewer', 'sjPlot', 'corrplot', 'car', 'parameters', 'scales'
)
loadPkg(toLoad)


### Function to compute effect sizes from sjPlot objects and vectors of 1 IV and the DV
effSize <- function(df, ivName, dvName, parms, data, cutOutliers=TRUE){
  # Coerce data object to data frame
  dat <- as.data.frame(data)
  
  # Pull iv and dv from df
  iv <- df[,ivName]
  dv <- df[,dvName]
  
  # Cut NAS from IV/DV
  iv <- iv[!is.na(iv)]
  dv <- dv[!is.na(dv)]
  
  # Cut out outliers in legislative behavior
  if(cutOutliers){
    if(ivName=='PartyDonorsPrimary'){
      dv <- dv[-outliers(dv)]
    } else {
      iv <- iv[-outliers(iv)]
    }
  }
  
  # SDs of IV and DV
  ivSD <- sd(iv)
  dvSD <- sd(dv)
  
  # Loop through combinations to compute change in DV from 1 SD change in IV for each
  # And then the amount change in terms of SDs of the DV
  fullEff <- c()
  fullEffSD <- c()
  sdEff <- c()
  sdEffSD <- c()
  
  for(ii in 1:nrow(parms)){
    # Pull data points for this maj/freshman combo
    preds <- filter(dat, group==parms$group[ii], facet==parms$facet[ii])
    # Figure out the scale of the IV
    ivScale <- preds$x[2]-preds$x[1]
    # Change in DV over range of IV
    delta <- preds$predicted[nrow(preds)]-preds$predicted[1]
    # Change in DV for a 1 SD change in the IV (or a 0 to 200 change in party support)
    if(ivName=='PartyDonorsPrimary'){
      deltaSD <- preds$predicted[preds$x==200] - preds$predicted[preds$x==0]
    } else {
      deltaSD <- ((preds$predicted[2]-preds$predicted[1])/ivScale)*ivSD
    }
    
    # Put changes in terms of SD of the DV
    sdChange <- delta/dvSD
    sdChangeSD <- deltaSD/dvSD
    
    # Add to vectors
    fullEff <- c(fullEff, delta)
    fullEffSD <- c(fullEffSD, sdChange)
    sdEff <- c(sdEff, deltaSD)
    sdEffSD <- c(sdEffSD, sdChangeSD)
    
    # Name this element of each list according to the corresponding maj/freshman combo
    name <- paste0(parms$group[ii], '_', parms$facet[ii]) %>% str_replace(., ' ', '')
    names(fullEff)[ii] <- names(fullEffSD)[ii] <- names(sdEff)[ii] <- names(sdEffSD)[ii] <- name 
  }
  
  # Make into a single list
  res <- list(fullEff=fullEff, fullEffSD=fullEffSD, sdEff=sdEff, sdEffSD=sdEffSD)
  
  # Return
  return(res)
}


# Write a function to get predicted data in paper 2 (8_multivariateAnalysis.R and 10_appendix.R)
predDat <- function(model, vars, data, std=FALSE){
  # Get the predicted data from plot_model
  p <- plot_model(model, type='pred', terms=vars)
  
  # Pull out the predicted data
  dat <- as.data.frame(p$data)
  
  # Pull out low, median, and high values of facet and group
  if(d(vars)==3 & any(!is.na(str_extract(dat$facet, '0\\.[0-9]+')))){
    dat$facet <- str_extract(dat$facet, '-[0-9]{1}\\.[0-9]+|[0-9]{1}\\.[0-9]+')
    facetVals <- dat$facet %>% unique() %>% num() %>% sort()
    dat %<>% mutate(facet=num(facet))
    dat$facet[dat$facet==facetVals[1]] <- 'Low'
    dat$facet[dat$facet==facetVals[2]] <- 'Median'
    dat$facet[dat$facet==facetVals[3]] <- 'High'
    dat %<>% mutate(facet = factor(facet, levels=c('Low', 'Median', 'High')))
  }
  if('Yes' %in% unique(dat$group)){
    dat %<>% mutate(group=as.numeric(group)-1)
  }
  if('Yes' %in% unique(dat$facet)){
    dat %<>% mutate(facet=as.numeric(facet)-1)
  }
  groupVals <- sort(num(unique(dat$group)))
  
  if(d(groupVals)==3){
    # Rename values of +/- 1 SD for plotting
    dat %<>% mutate(group=num(group))
    dat$group[dat$group==groupVals[1]] <- 'Low'
    dat$group[dat$group==groupVals[2]] <- 'Median'
    dat$group[dat$group==groupVals[3]] <- 'High'
    
    dat %<>%
      mutate(group = factor(group, levels=c('Low', 'Median', 'High'))) 
  } else {
    dat <- dat %>%
      mutate(group=car::recode(group, "0='No'; 1='Yes'")) %>%
      mutate(group=factor(group, levels=c('No', 'Yes'))) # For priorities, "group" is "minority priority"
  }
  
  
  # Map x-axis values (+/- 1 SD from the median if SD=TRUE, but defaults to min/max)
  if(std){
    xVar <- str_replace(vars[1], ' \\[all\\]', '')
    xMed <- median(data[,xVar], na.rm=TRUE)
    xSD <- sd(data[,xVar], na.rm=TRUE)
    xLo <- xMed-xSD
    xHi <- xMed+xSD
    matchVal <- function(vec, val){which.min(abs(vec-val))}
    xVals <- sapply(c(xLo, xMed, xHi), function(x){ dat$x[matchVal(dat$x, x)] })
    if(any(duplicated(xVals))){
      xVals[which(duplicated(xVals)):d(xVals)] <- xVals[which(duplicated(xVals)):d(xVals)] + xVals[which(duplicated(xVals))-1]
    }
  } else {
    xVals <- c(min(dat$x), max(dat$x))
  }
  
  # Filter to xVals for the x axis
  dat <- filter(dat, x %in% xVals)
  if(d(unique(dat$x))==3){
    dat$x[dat$x==min(dat$x)] <- 'Low'
    dat$x[dat$x==min(dat$x)] <- 'High'
    dat$x[!dat$x %in% c('Low', 'High')] <- 'Median'
  }
  
  # Rename facet and group variables
  dat %<>% dplyr::rename(!!vars[2]:=group)
  if(d(vars)==3){ dat %<>% dplyr::rename(!!vars[3]:=facet) }
  
  # Fix any probabilities that aren't in range
  dat$predicted[dat$predicted < 0] <- 0
  dat$predicted[dat$predicted > 1] <- 1
  dat$conf.low[dat$conf.low < 0] <- 0
  dat$conf.high[dat$conf.high > 1] <- 1
  
  # Rename to match plotting
  dat %<>%
    dplyr::rename(lo=conf.low) %>%
    dplyr::rename(hi=conf.high)
  if(d(vars)==2 & 'minSD' %in% names(dat)){
    dat %<>% dplyr::rename(majSD=x)
  } else if(d(vars)==2 & 'majSD' %in% names(dat)){
    dat %<>% dplyr::rename(minSD=x)
  }
  
  # Cut columns
  dat %<>% dplyr::select(-c(std.error, group_col))
  
  # Return
  return(dat)
  
}


# Function for predicted probabilities plot
predProbPlot <- function(pDat, breaks, ylab, xlab, ylim, mains, rowControl, legTitle, var){
  if(!is.null(rowControl)){
    par(mfrow=rowControl)
  }
  cols <- brewer.pal(6, 'Greys') %>% .[c(4:6)]
  
  pDatNo <- filter(pDat, x==min(pDat$x))
  breaks <- breaks
  plot(0:(nrow(pDatNo)+2*breaks-1), 0:(nrow(pDatNo)+2*breaks-1), type='n', 
       ylab=ylab, xlab=xlab, 
       ylim=ylim, xaxt='n', las=2, main=mains[1])
  for(ii in 1:nrow(pDatNo)){
    colIndex <- which(c('Low', 'Median', 'High')==pDatNo[ii,paste0('maj', var)])
    if(ii %in% 1:3){
      points(ii, pDatNo$predicted[ii], col=cols[colIndex], pch=16, cex=2)
      lines(x=rep(ii, 2), y=c(pDatNo$lo[ii], pDatNo$hi[ii]), col=cols[colIndex], lwd=3)
    } else if(ii %in% 4:6){
      points(ii+2, pDatNo$predicted[ii], col=cols[colIndex], pch=16, cex=2)
      lines(x=rep(ii+2, 2), y=c(pDatNo$lo[ii], pDatNo$hi[ii]), col=cols[colIndex], lwd=3)
    } else if(ii %in% 7:9) {
      points(ii+4, pDatNo$predicted[ii], col=cols[colIndex], pch=16, cex=2)
      lines(x=rep(ii+4, 2), y=c(pDatNo$lo[ii], pDatNo$hi[ii]), col=cols[colIndex], lwd=3)
    }
  }
  axis(side=1, at=c(2, 7, 12), labels=c('Low', 'Median', 'High'), las=1, tick=FALSE)
  legend(x='topright', legend=c('Low', 'Median', 'High'), fill=cols, bty='n', title=legTitle)
  
  pDatYes <- filter(pDat, x==max(pDat$x))
  plot(0:(nrow(pDatYes)+2*breaks-1), 0:(nrow(pDatYes)+2*breaks-1), type='n', 
       ylab=ylab, xlab=xlab, 
       ylim=ylim, xaxt='n', las=2, main=mains[2])
  for(ii in 1:nrow(pDatYes)){
    colIndex <- which(c('Low', 'Median', 'High')==pDatNo[ii,paste0('maj', var)])
    if(ii %in% 1:3){
      points(ii, pDatYes$predicted[ii], col=cols[colIndex], pch=16, cex=2)
      lines(x=rep(ii, 2), y=c(pDatYes$lo[ii], pDatYes$hi[ii]), col=cols[colIndex], lwd=3)
    } else if(ii %in% 4:6){
      points(ii+2, pDatYes$predicted[ii], col=cols[colIndex], pch=16, cex=2)
      lines(x=rep(ii+2, 2), y=c(pDatYes$lo[ii], pDatYes$hi[ii]), col=cols[colIndex], lwd=3)
    } else if(ii %in% 7:9) {
      points(ii+4, pDatYes$predicted[ii], col=cols[colIndex], pch=16, cex=2)
      lines(x=rep(ii+4, 2), y=c(pDatYes$lo[ii], pDatYes$hi[ii]), col=cols[colIndex], lwd=3)
    }
  }
  axis(side=1, at=c(2, 7, 12), labels=c('Low', 'Median', 'High'), las=1, tick=FALSE)
  legend(x='topleft', legend=c('Low', 'Median', 'High'), fill=cols, bty='n', title='Majority Spread')
  
  if(!is.null(rowControl)){
    par(mfrow=c(1,1))
  }
}



##################################################################
# Data

### Load bills data, clean

# Load
bills <- fread(file = '~/Dropbox/Projects/Minority Party/Data/allBillsMerged_97-114_Final.csv', 
               stringsAsFactors = FALSE, data.table = FALSE)

# Global for unique congresses in bills sample
congs <- unique(bills$congress) %>% sort()

# Create election year variable from days.to.elec
bills$elec.year <- ifelse(bills$days.to.elec < 365, yes = 1, no = 0)

# Create filter for private bills
aw <- fread(paste0(pth, 'Data/bills93-114.csv'), stringsAsFactors = FALSE) %>%
  filter(., Cong %in% congs)
private <- filter(aw, Private==1) %>% mutate(., billnumber=paste0(Cong, BillType, BillNum)) %>% pull(billnumber)
bills$private <- ifelse(bills$id %in% private , yes=1, no=0)
rm(aw)

# Need the absolute value of the difference between members' unity scores and their party
bills$absUnityPtyDiff <- abs(bills$unityPtyDiff)

# Let's also grab a dummy variable for whether members are more extreme than their party median
bills$extreme <- ifelse( (bills$DW_dist_pty < 0 & bills$party== 0) |
                           (bills$DW_dist_pty > 0 & bills$party== 1), yes = 1, no = 0)
bills$extreme_interaction <- bills$absDW_dist_pty * bills$extreme

# Finally, an absolute value of distPresNorm to give us a better "competitiveness" metric
bills$absDistPresNorm <- abs(bills$distPresNorm)

# Make an indicator for substantive bills
bills$substantive <- 1
bills$substantive[bills$private==1] <- 0
bills$substantive[bills$impbill==0] <- 0
bills$substantive[bills$major==99] <- 0

# Fix relSen NAs
bills$relSen[is.na(bills$relSen)] <- 0

# Fix csDiffNS NAs (IS THIS RIGHT TO DO?)
bills$csDiffNS[is.na(bills$csDiffNS)] <- 0

# Make another competitiveness variable (absolute value of PVI)
bills$absPVI <- abs(bills$district_pvi)

# Absolute value of the difference between the sponsor dwnom and the mean of the cosponsors
bills$absCSDiff <- abs(bills$csDiffNS)

# Distance between mean of cosponsors dwnoms (*not* including sponsor) and majority party median (interact with difference between sponsor and majority party?)
bills$csMajDiff <- abs(bills$csDiffNS - bills$medDW_maj)

# Create election year variable from days.to.elec
bills$elec.year <- ifelse(bills$days.to.elec < 365, yes = 1, no = 0)


### Load amendments data, drop non R/D party
amdt <- fread(file = '~/Dropbox/Projects/Minority Party/Data/allAmendments_97-112_Final.csv', stringsAsFactors = FALSE, data.table = FALSE) %>%
  filter(., party!=328)
